---
title: "Y02E Network"
output: html_document
date: "2025-04-18"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

rm(list = ls())
cat("\014")
```

```{r}
library(vroom)
library(tidyverse)

library(vroom)
library(ggplot2)
library(ggspatial)
library(ggthemes)
library(DescTools)
library(knitr)


library(kableExtra)
library(igraph)
library(ggraph)
library(tidygraph)
library(ggrepel)
```


*Load Data* 
```{r}
# sim <- vroom("data/y02e_w_citations.csv")

sim <- vroom("data/y02e_w_citations_2025.csv")

#sim$patent_year <- substr(sim$patent_date, 1, 4)

sim <- sim %>%
  filter(inventor_sequence == 0)
```

*These are all Y02E patents, only US inventors, citing US patents*  

*We look at all Y02 between 2000-2022 with citation to patents >= 1999 *
```{r}
sim <- sim %>%
  filter(patent_year >= 2000 & patent_year <= 2024 & citation_year >= 1999)
```

*Distinct patents*
```{r}
test <- sim %>%
  distinct(patent_id)
```

*40,073*

*Number of citation connections*
```{r}
citations <- sim %>%
  distinct(patent_id, citation_patent_id, .keep_all = T)
```
*555,238*


*Filter for distinct patent - cited_patent combinations*
```{r}
sim <- sim %>%
  distinct(patent_id, citation_patent_id, .keep_all = T)
```



```{r}
sim <- sim %>%
  rename(assignee_organization = disambig_assignee_organization)
```

*Recode assignee organization to reflect funding of federal agency*

```{r}
sim <- sim %>%
  mutate(
    assignee_organization = if_else(!is.na(level_one), level_one, assignee_organization),
    cited_organization = if_else(!is.na(cited_level_one), cited_level_one, cited_organization)
  )

```

*Standardize organization names*
```{r}
library(dplyr)
library(stringr)

# Extended pattern list
org_patterns <- list(
  "NASA" = "nasa|national aeronautics and space administration|the united states of america as represented by the administrator of the national aeronautics and space administration",
  "Department of Energy" = "department of energy|sandia|battelle|alliance for sustainable energy.*|uchicago argonne|united states energy research and development administration|lawrence livermore national laboratory|argonne national laboratory|lawrence berkeley national laboratory|Battell Energy Alliance, LLC",
  "Department of Defense" = "army|navy|air force",
  "SunPower" = "sunpower",
  "Boeing" = "boeing",
  "NI Standards" = "national institute of standards and technology|the united states of america, as represented by the secretary of commerce national institute of standards and technology|14th and constitution national institute of standards and technology"
)

# Apply standardization
for (org in names(org_patterns)) {
  pattern <- org_patterns[[org]]
  sim <- sim %>%
    mutate(
      assignee_organization = if_else(str_detect(assignee_organization, regex(pattern, ignore_case = TRUE)), org, assignee_organization),
      cited_organization = if_else(str_detect(cited_organization, regex(pattern, ignore_case = TRUE)), org, cited_organization)
    )
}

```

*Standardize names* 

*This requires a step below* 

*Turn into network object* 
```{r}

forward <- sim %>%
  filter(!is.na(cited_organization)) %>%
  dplyr::select(assignee_organization, cited_organization, patent_year) %>%
  rename(source = cited_organization, destination = assignee_organization, year_citation_was_made = patent_year)

sources <- forward %>%
  ungroup() %>%
  distinct(source, .keep_all = T) %>%
  dplyr::select(source) %>%
  rename(label = source)

#Create edge list
#
destinations <- forward %>%
  ungroup() %>%
  distinct(destination, .keep_all = T) %>%
  dplyr::select(destination) %>%
  rename(label = destination)

#sources$label <- as.numeric(sources$label)
  
#join
nodes <- full_join(sources, destinations, by = "label")

#create id column
nodes <- nodes %>% rowid_to_column("id")

#Create edgelist
per_route <- forward %>%
  group_by(source, destination, year_citation_was_made) %>%
  summarise(weight = n()) %>%
  ungroup()
#per_route

#per_route$source <- as.numeric(per_route$source)

edges <- per_route %>%
  left_join(nodes, by = c("source" = "label")) %>%
  rename(from = id)

edges <- edges %>%
  left_join(nodes, by = c("destination" = "label")) %>%
  rename(to = id) %>%
  rename(year = year_citation_was_made)

edges <- dplyr::select(edges, from, to, weight, year)

#We need to add a type attribute based on university, government agency, firm

nodes <- nodes %>%
  mutate(type = case_when(grepl("university|college|TEES|colorado school of mines|massachusetts institute|naval academy|CUNY Energy Institute|Marine Biological Laboratory|california institute|georgia tech|
                                rensselaer polytechnic institute|montana state|
                                rochester institute of technology|naval post graduate school|
                                ndsu|new jersey institute of technology|State of Oregon acting by and through the State Board of Higher Ed. on Behalf of Oregon State Univ.|
                                Rochester Institute of Technology|
                                Board of Regents of the Nevada System of Higher Education on behalf of the Desert Research Institute", label, ignore.case = TRUE) ~ "University", grepl("NASA|Department of Energy|Department of Defense|National Science Foundation|NI Standards|
                                Environmental Protection Agency|Department|Government", label, ignore.case = TRUE) ~ "National Lab",
                          TRUE ~ "Firm")) 

na <- nodes %>%
  filter(is.na(label))
#none
#removing self citations
edges_excl <- edges %>%
  filter(!from == to) %>%
  filter(from %in% nodes$id) %>%
  filter(to %in% nodes$id)


```

**REQUIRED STEP**
*Check organization names*

```{r}
library(stringdist)

label_pairs <- expand.grid(label1 = nodes$label, label2 = nodes$label) %>%
  filter(label1 != label2) %>%
  distinct() %>%
  mutate(similarity = 1 - stringdist(label1, label2, method = "jw"))

similar_labels <- label_pairs %>%
  filter(similarity > 0.97)
```

*Back to standardization*

```{r}

# Step 1: Create a group key before pivoting
similar_labels <- similar_labels %>%
  mutate(group_key = pmin(label1, label2))  # alphabetically first label

name_map <- similar_labels %>%
  pivot_longer(cols = c(label1, label2), values_to = "raw_name", names_to = NULL) %>%
  distinct(group_key, raw_name) %>%
  mutate(raw_name = as.character(raw_name)) %>%  # convert to character
  group_by(group_key) %>%
  mutate(canonical = min(raw_name)) %>%
  ungroup() %>%
  dplyr::select(raw_name, canonical)

```


```{r}
# Standardize assignee_organization
sim <- sim %>%
  left_join(name_map, by = c("assignee_organization" = "raw_name")) %>%
  mutate(assignee_organization = coalesce(canonical, assignee_organization)) %>%
  dplyr::select(-canonical)

# Standardize cited_organization
sim <- sim %>%
  left_join(name_map, by = c("cited_organization" = "raw_name")) %>%
  mutate(cited_organization = coalesce(canonical, cited_organization)) %>%
  dplyr::select(-canonical)

```

```{r}
sim <- sim %>%
  distinct(patent_id, citation_patent_id, .keep_all = T)
```

*Test*
```{r}
test <- sim %>%
  filter(assignee_organization %in% name_map$raw_name)
```

```{r}
rm(test)
```



*Turn into network object* 
```{r}

forward <- sim %>%
  filter(!is.na(cited_organization)) %>%
  dplyr::select(assignee_organization, cited_organization, patent_year) %>%
  rename(source = cited_organization, destination = assignee_organization, year_citation_was_made = patent_year)

sources <- forward %>%
  ungroup() %>%
  distinct(source, .keep_all = T) %>%
  dplyr::select(source) %>%
  rename(label = source)

#Create edge list
#
destinations <- forward %>%
  ungroup() %>%
  distinct(destination, .keep_all = T) %>%
  dplyr::select(destination) %>%
  rename(label = destination)

#sources$label <- as.numeric(sources$label)
  
#join
nodes <- full_join(sources, destinations, by = "label")

#create id column
nodes <- nodes %>% rowid_to_column("id")

#Create edgelist
per_route <- forward %>%
  group_by(source, destination, year_citation_was_made) %>%
  summarise(weight = n()) %>%
  ungroup()
#per_route

#per_route$source <- as.numeric(per_route$source)

edges <- per_route %>%
  left_join(nodes, by = c("source" = "label")) %>%
  rename(from = id)

edges <- edges %>%
  left_join(nodes, by = c("destination" = "label")) %>%
  rename(to = id) %>%
  rename(year = year_citation_was_made)

edges <- dplyr::select(edges, from, to, weight, year)

#We need to add a type attribute based on university, government agency, firm

nodes <- nodes %>%
  mutate(type = case_when(grepl("university|college|TEES|colorado school of mines|massachusetts institute|naval academy|CUNY Energy Institute|Marine Biological Laboratory|california institute|georgia tech|
                                rensselaer polytechnic institute|montana state|
                                rochester institute of technology|naval post graduate school|
                                ndsu|new jersey institute of technology|State of Oregon acting by and through the State Board of Higher Ed. on Behalf of Oregon State Univ.|
                                Rochester Institute of Technology|
                                Board of Regents of the Nevada System of Higher Education on behalf of the Desert Research Institute", label, ignore.case = TRUE) ~ "University", grepl("NASA|Department of Energy|Department of Defense|National Science Foundation|NI Standards|
                                Environmental Protection Agency|Department|Government", label, ignore.case = TRUE) ~ "National Lab",
                          TRUE ~ "Firm")) 

na <- nodes %>%
  filter(is.na(label))
#none
#removing self citations
edges_excl <- edges %>%
  filter(!from == to) %>%
  filter(from %in% nodes$id) %>%
  filter(to %in% nodes$id)


```


*Clean up environment* 
```{r}
rm(label_pairs, destinations, edges, forward, na, name_map, per_route, 
   similar_labels, sources)
```


*Recode labels*
```{r}
nodes <- nodes %>%
  mutate(label = case_when(label == "Department of Defense" ~ "DoD",
                           label == "Department of Energy" ~ "DOE",
                           label == "National Science Foundation" ~ "NSF",
                           label == "United States Government" ~ "US Gov.",
                           TRUE ~ label))
```




```{r}
write.csv(nodes, "nodes_yo2e.csv")
write.csv(edges_excl, "edges_y02e.csv")
```



```{r}
nodes <- vroom("nodes_yo2e.csv")
edges_excl <- vroom("edges_y02e.csv")
```


**Descriptive Network Statistics**


*Turn into igraph object*
```{r}
net <- graph_from_data_frame(d = edges_excl, vertices = nodes, directed = T)
```


```{r}
# Calculate the out-degree of each vertex
g.outd <- degree(net, mode = c("out"))
# View a summary of out-degree
table(g.outd)


# Find the vertex that has the maximum out-degree
which.max(g.outd)

#Create a dataframe of the out-degree distribution 

# Convert named vector to a long dataframe
long_deg <- data.frame(id = names(g.outd), Value = g.outd) %>%
  gather(key = "id", value = "Value") %>%
  mutate(id = as.numeric(id))

long_deg <- left_join(long_deg, nodes)

ordered_out_deg <- long_deg %>%
  arrange(desc(Value))

subset_out_deg <- head(ordered_out_deg, n = 20)

kable(subset_out_deg)
```


```{r}
# Calculate 99th percentile of out-degrees
quantile_99_outdeg <- quantile(g.outd, probs = 0.99, na.rm = TRUE)

print(quantile_99_outdeg)

```


*Descriptive Statistics of the top 100*
```{r}
subset_out_deg <- head(ordered_out_deg, n = 100)

mean(subset_out_deg$Value)
median(subset_out_deg$Value)
sd(subset_out_deg$Value)
```
*citations*

```{r}
outgoing_weights <- strength(net, mode = "out", loops = FALSE)

# Convert named vector to a long dataframe
long_cit <- data.frame(id = names(outgoing_weights), Value = outgoing_weights) %>%
  gather(key = "id", value = "Value") %>%
  mutate(id = as.numeric(id))

long_cit <- left_join(long_cit, nodes)

ordered_out_cit <- long_cit %>%
  arrange(desc(Value))

subset_out_cit <- head(ordered_out_cit, n = 15)

kable(subset_out_cit)
```


```{r}
quantile_99_outgoing <- quantile(outgoing_weights, probs = 0.99, na.rm = TRUE)

print(quantile_99_outgoing)

```


*Hub Score*

```{r}
hs <- hub_score(net, weights=NA)$vector

top_twenty <- hs[order(hs, decreasing = TRUE)[1:20]]

df <- as.data.frame(top_twenty)

df <- cbind(id = rownames(df), df)
rownames(df) <- 1:nrow(df)
df$id <- as.integer(df$id)

df <- left_join(df, nodes)

kable(df)
```



```{r}
quantile_99_hs <- quantile(hs, probs = 0.99, na.rm = TRUE)

print(quantile_99_hs)

```



```{r}
node_attributes <- V(net)$label

node_attributes

```

2 - DOD
16 - NSF 
24 - US Gov
28 - SunPower 
39 - NASA 
88 - DOE 


```{r}
library(ggrepel)
library(MASS)
library(RColorBrewer) 

# Here, I am specifying a specific color palette -- instead of using ggplot's 
# defaults. 

colors <- brewer.pal(n = 5, "Set1") 
# Here, I am choosing specific colors to use below

red <- colors[1] 
blue <- colors[2] 
green <- colors[3]
orange <- colors[4]
purple <- colors[5]
```


```{r}
node_attributes <- V(net)$label

node_attributes
```


*Calculate Annual Statistics*

```{r}
calculate_outdegrees_for_nodes <- function(net, node_labels, year_range) {
  outdegree_data <- data.frame(Year = year_range)
  
  for (label in node_labels) {
    outdegree_data[[label]] <- numeric(length(year_range))
    
    for (i in 1:length(year_range)) {
      year <- year_range[i]
      yearly_network <- subgraph.edges(net, E(net)[year == year_range[i]])
      weighted_outdegree <- degree(yearly_network, mode = "out")
      specific_outdegree <- weighted_outdegree[label]
      outdegree_data[[label]][i] <- specific_outdegree
    }
  }
  
  return(outdegree_data)
}

node_labels <- c("2", "87")  
year_range <- 2000:2024  

outdegree_dataframe <- calculate_outdegrees_for_nodes(net, node_labels, year_range)
```


```{r}
# Define a function to calculate the 75th percentile of outdegree values for each year
calculate_percentile_outdegrees <- function(net, year_range) {
  percentile_data <- data.frame(Year = year_range, Percentile = numeric(length(year_range)))
  
  for (i in 1:length(year_range)) {
    year <- year_range[i]
    yearly_network <- subgraph.edges(net, E(net)[year == year_range[i]])
    outdegrees <- degree(yearly_network, mode = "out")
    percentile_75 <- quantile(outdegrees, 0.999, na.rm = TRUE)
    percentile_data$Percentile[i] <- percentile_75
  }
  
  return(percentile_data)
}

percentile_outdegrees_dataframe <- calculate_percentile_outdegrees(net, year_range)

#############


names <- c("year", "DOD", "DOE")

colnames(outdegree_dataframe) <- names

# left joining the 99th percentile
outdegree_dataframe <- cbind(outdegree_dataframe, percentile_outdegrees_dataframe)

outdegree_dataframe <- outdegree_dataframe %>%
  dplyr::select(-Year)

outdegree_long <- outdegree_dataframe %>%
  pivot_longer(cols = 2:4, names_to = "org", values_to = "citations")

outdegree_long %>%
  filter(org %in% c("DOE", "DOD", "Percentile")) %>%
  filter(year < 2024) %>%
  ggplot(aes(x = year, y = citations, group = org, color = org)) +
  geom_line(aes(linetype = org)) +
  theme_tufte() +
  xlab("") + ylab("Out Degree") + 
  theme(legend.position = "none") +
  scale_color_manual(
    values = c("DOE" = green, 
               "DOD" = red, 
               "Percentile" = blue)) +
  scale_linetype_manual(
     values = c("DOE" = "longdash", 
                "DOD" = "solid",
                "Percentile" = "dotted")) +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14,face="bold"),
        legend.position = "none") + 
  scale_y_continuous(breaks = c(20, 200, 400, 600))
```



```{r}
deg <- outdegree_long %>%
  filter(org %in% c("DOE", "DOD", "Percentile")) %>%
  filter(year < 2024) %>%
  ggplot(aes(x = year, y = citations, group = org, color = org, linetype = org)) +
  geom_line(size = 1.2) +  # Thicker lines
  theme_clean() +
  xlab("") + 
  ylab("Out Degree") + 
  scale_color_manual(
    values = c("DOE" = green, 
               "DOD" = red, 
               "Percentile" = blue)) +
  scale_linetype_manual(
    values = c("DOE" = "longdash", 
               "DOD" = "solid",
               "Percentile" = "dotted")) +
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 14, face = "bold"),
        legend.position = "none") +
  scale_y_continuous(breaks = c(20, 200, 400, 600)) +
  annotate("text", x = 2016, y = 320, label = "DOE", color = green, size = 5, hjust = 0) +
  annotate("text", x = 2017, y = 180, label = "DOD", color = red, size = 5, hjust = 0) +
  annotate("text", x = 2012, y = 50, label = "99th Percentile", color = blue, size = 4, hjust = 0)

deg
```




*Calculate Annual Hub Scores* 

```{r}
# Define the function to calculate hub scores
calculate_hub_scores_for_nodes <- function(net, node_labels, year_range) {
  hub_score_data <- data.frame(Year = year_range)
  
  for (label in node_labels) {
    hub_score_data[[label]] <- numeric(length(year_range))
    
    for (i in 1:length(year_range)) {
      year <- year_range[i]
      yearly_network <- subgraph.edges(net, E(net)[year == year_range[i]])
      hub_scores <- hub_score(yearly_network, weights = NA)$vector
      specific_hub_score <- hub_scores[label]
      hub_score_data[[label]][i] <- specific_hub_score
    }
  }
  
  return(hub_score_data)
}

node_labels <- c("2", "87")  

year_range <- 2000:2024 

hub_score_dataframe <- calculate_hub_scores_for_nodes(net, node_labels, year_range)

calculate_percentile_hub_scores <- function(net, year_range) {
  percentile_data <- data.frame(Year = year_range, Percentile = numeric(length(year_range)))
  
  for (i in 1:length(year_range)) {
    year <- year_range[i]
    yearly_network <- subgraph.edges(net, E(net)[year == year_range[i]])
    hub_scores <- hub_score(yearly_network, weights = NA)$vector
    percentile_75 <- quantile(hub_scores, 0.99, na.rm = TRUE)
    percentile_data$Percentile[i] <- percentile_75
  }
  
  return(percentile_data)
}


percentile_hub_scores_dataframe <- calculate_percentile_hub_scores(net, year_range)

names <- c("year", "DOD", "DOE")

colnames(hub_score_dataframe) <- names

hub_score_dataframe <- cbind(hub_score_dataframe, percentile_hub_scores_dataframe)

hub_score_dataframe <- hub_score_dataframe %>%
  dplyr::select(-Year)


hub_long <- hub_score_dataframe %>%
  pivot_longer(cols = 2:4, names_to = "org", values_to = "citations")

hub <- hub_long %>%
  filter(org %in% c("DOD", "DOE", "Percentile")) %>%
  filter(year < 2024) %>%
  ggplot(aes(x = year, y = citations, group = org, color = org, linetype = org)) +
  geom_line(size = 1.2) +  # Corrected size placement
  theme_clean() +
  xlab("") + 
  ylab("Hub Score") + 
  scale_color_manual(
    values = c("DOE" = green, 
               "DOD" = red, 
               "Percentile" = blue)) +
  scale_linetype_manual(
    values = c("DOE" = "longdash", 
               "DOD" = "solid",
               "Percentile" = "dotted")) +
  scale_y_continuous() +  # You can customize breaks if needed
  theme(
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14, face = "bold"),
    legend.position = "none"
  )

hub
```




*Annual outgoing citations* 

```{r}

# Define the function to calculate outgoing citations
calculate_outgoing_citations_for_nodes <- function(net, node_labels, year_range) {
  citations_data <- data.frame(Year = year_range)
  
  for (label in node_labels) {
    citations_data[[label]] <- numeric(length(year_range))
    
    for (i in 1:length(year_range)) {
      year <- year_range[i]
      yearly_network <- subgraph.edges(net, E(net)[year == year_range[i]])
      outgoing_citations <- sum(E(yearly_network)[.from(label)]$weight)
      citations_data[[label]][i] <- outgoing_citations
    }
  }
  
  return(citations_data)
}

node_labels <- c("2", "87")  
year_range <- 2000:2023  

citations_dataframe <- calculate_outgoing_citations_for_nodes(net, node_labels, year_range)

calculate_percentile_citations <- function(net, year_range) {
  percentile_data <- data.frame(Year = year_range, Percentile = numeric(length(year_range)))
  
  for (i in 1:length(year_range)) {
    year <- year_range[i]
    yearly_network <- subgraph.edges(net, E(net)[year == year_range[i]])
    citation_values <- E(yearly_network)$weight
    percentile_75 <- quantile(citation_values, 0.99, na.rm = TRUE)
    percentile_data$Percentile[i] <- percentile_75
  }
  
  return(percentile_data)
}

percentile_citations_dataframe <- calculate_percentile_citations(net, year_range)

names <- c("year", "DOD", "DOE")

colnames(citations_dataframe) <- names

# left joining the 99th percentile
citations_dataframe<- cbind(citations_dataframe, percentile_citations_dataframe)

citations_dataframe <- citations_dataframe %>%
    dplyr::select(-Year)


citations_long <- citations_dataframe %>%
  pivot_longer(cols = 2:4, names_to = "org", values_to = "citations")

 
cit <- citations_long %>%
  filter(org %in% c("DOE", "DOD", "Percentile")) %>%
  filter(year < 2024) %>%
  ggplot(aes(x = year, y = citations, group = org, color = org, linetype = org)) +
  geom_line(size = 1.2) +  # Thicker lines
  theme_clean() +
  xlab("") + 
  ylab("Citations") + 
  scale_color_manual(
    values = c("DOE" = green, 
               "DOD" = red, 
               "Percentile" = blue)) +
  scale_linetype_manual(
    values = c("DOE" = "longdash", 
               "DOD" = "solid",
               "Percentile" = "dotted")) +
  scale_y_continuous(breaks = c(10, 1000, 2000, 3000)) +
  theme(
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14, face = "bold"),
    legend.position = "none"
  )

cit
```



*Eigenvectors*

```{r}
# Define the function to calculate eigenvector centrality
calculate_eigenvector_centrality_for_nodes <- function(net, node_labels, year_range) {
  centrality_data <- data.frame(Year = year_range)
  
  for (label in node_labels) {
    centrality_data[[label]] <- numeric(length(year_range))
    
    for (i in 1:length(year_range)) {
      year <- year_range[i]
      yearly_network <- subgraph.edges(net, E(net)[year == year_range[i]])
      eigenvector_centrality <- eigen_centrality(yearly_network)$vector
      specific_eigenvector_centrality <- eigenvector_centrality[label]
      centrality_data[[label]][i] <- specific_eigenvector_centrality
    }
  }
  
  return(centrality_data)
}

node_labels <- c("2", "87")  
year_range <- 2000:2024 

eigenvector_centrality_dataframe <- calculate_eigenvector_centrality_for_nodes(net, node_labels, year_range)

## 

# Define a function to calculate the 75th percentile of eigen centrality values for each year
calculate_percentile_eigen_centrality <- function(net, year_range) {
  percentile_data <- data.frame(Year = year_range, Percentile = numeric(length(year_range)))
  
  for (i in 1:length(year_range)) {
    year <- year_range[i]
    yearly_network <- subgraph.edges(net, E(net)[year == year_range[i]])
    eigen_centrality <- eigen_centrality(yearly_network)$vector
    percentile_75 <- quantile(eigen_centrality, 0.99, na.rm = TRUE)
    percentile_data$Percentile[i] <- percentile_75
  }
  
  return(percentile_data)
}

percentile_eigen_centrality_dataframe <- calculate_percentile_eigen_centrality(net, year_range)

####

names <- c("year", "DOD", "DOE")

colnames(eigenvector_centrality_dataframe) <- names

# left joining the 99th percentile
eigenvector_centrality_dataframe <- cbind(eigenvector_centrality_dataframe, percentile_eigen_centrality_dataframe)

eigenvector_centrality_dataframe <- eigenvector_centrality_dataframe %>%
    dplyr::select(-Year)



colnames(eigenvector_centrality_dataframe)[4]  <- "99th Perc."


eigen_long <- eigenvector_centrality_dataframe %>%
  pivot_longer(cols = 2:4, names_to = "org", values_to = "EigenVec")

```



```{r}
eig <- eigen_long %>%
  filter(org %in% c("DOE", "DOD", "99th Perc.")) %>%
  filter(year < 2024) %>%
  ggplot(aes(x = year, y = EigenVec, group = org, color = org, linetype = org)) +
  geom_line(size = 1.2) +  # Thicker lines
  theme_clean() +
  xlab("") + 
  ylab("Eigenvector") +
  scale_color_manual(
    values = c("DOE" = green, 
               "DOD" = red, 
               "99th Perc." = blue)
  ) +
  scale_linetype_manual(
    values = c("DOE" = "longdash", 
               "DOD" = "solid",
               "99th Perc." = "dotted")
  ) +
  theme(
    axis.text = element_text(size = 13),
    axis.title = element_text(size = 13, face = "bold"),
    legend.position = "none"  # Remove legend
  )

```



```{r}

library(cowplot)
library(ggpubr)

library("gridExtra")
library(grid)

plots_combined <- cowplot::plot_grid(
  cit, deg, hub, eig,
  ncol = 2, align = "hv"
)


plots_combined


width <- 8 

ggsave("fourwayplot_Y02E.png",plots_combined, width = width, height = width/1.618, units = "in")

```




*Clean up environment*
```{r}
rm(df, eig, eigen_long, eigenvector_centrality_dataframe, fourwayplot, hub,
   hub_long, hub_score_dataframe, long_cit, long_deg, ordered_out_cit, ordered_out_deg, org_patterns, outdegree_dataframe, outdegree_long, percentile_citations_dataframe, percentile_eigen_centrality_dataframe, percentile_hub_scores_dataframe, percentile_outdegrees_dataframe, subset_out_cit,
   subset_out_deg,  test)
```
```{r}
sum(edges_excl$weight)
```


**K-Cores** 


```{r}
# Calculate the coreness for each vertex
core_values <- coreness(net)

# Find the maximum k-core value
max_core <- max(core_values)
```

```{r}
# Define k value
k <- 163

# Extract the k-core subgraph
g_kcore <- induced_subgraph(net, vids = which(core_values >= k))

```


```{r}
library(igraph)
library(tidygraph)
library(ggraph)
library(ggrepel)


# Step 2: Define desired k value
k <- 160

# Step 3: Extract the k-core subgraph
g_kcore <- induced_subgraph(net, vids = which(core_values >= k))

# Step 4: Label key organizations, with override for "United States Government"
V(g_kcore)$lab <- ifelse(
  V(g_kcore)$label %in% c(
    "Department of Defense", 
    "Department of Energy", 
    "NASA", 
    "United States Government", 
    "National Science Foundation"
  ), 
  ifelse(
    V(g_kcore)$label == "United States Government", 
    "Federal Government", 
    V(g_kcore)$label
  ), 
  NA
)

g_kcore_tbl <- as_tbl_graph(g_kcore)

k_core <- ggraph(g_kcore_tbl, layout = "drl") +
  geom_edge_link(color = "lightgray", alpha = 0.25) +
  geom_node_point(aes(color = V(g_kcore)$type), size = 4, show.legend = FALSE) +
  geom_text_repel(
    aes(x = x, y = y, label = lab), 
    color = "black", 
    size = 3, 
    box.padding = 0.35, 
    point.padding = 0.5, 
    segment.color = "gray50"
  ) +
  scale_color_manual(values = c(
    "Firm" = "purple", 
    "University" = "yellow", 
    "National Lab" = "green"
  )) +
  theme_void()


k_core

width <- 8 

ggsave("k-cores_Y02E.png",k_core, width = width, height = width/1.618, units = "in")
```




*List of Core*

```{r}

institutions <- c("DoD", "General Motors LLC", "International Business Machines Corporation", "E.I. DU PONT DE NEMOURS AND COMPANY", "NSF",  
"US Gov.", "General Electric Company",                    
"Honeywell International Inc.",           
"NASA",                                        
"GM GLOBAL TECHNOLOGY OPERATINS LLC",        
"MASSACHUSETTS INSTITUTE OF TECHNOLOGY",    
"DOE",                                         
"3M Innovaative Properties Company",          
"Ford Global Technologies, L.L.C",             
"Boeing",                                      
 "CALIFORNIA INSTITUTE OF TECHNOLOGY",          
"Helwett-Packard Development Company, L.P.")

# Create a comma-separated string with line breaks after every few items
institutions_str <- paste(institutions, collapse = ", ")
formatted_institutions <- strwrap(institutions_str, width = 80)
formatted_text <- paste(formatted_institutions, collapse = "\n")

# Load the necessary package for exporting text as an image
library(grid)
library(gridExtra)

# Convert the formatted text into a grid graphic
text_plot <- textGrob(formatted_text, x = 0.5, y = 0.5, just = "center", gp = gpar(fontsize = 10))

# Save the plot as a PNG file in A4 dimensions
png("institutions_list_A4.png", width = 2480, height = 3508, res = 300)  # A4 size at 300 dpi
grid.draw(text_plot)
dev.off()


```







```{r}
library(intergraph)
library(ergm.count)
```



*Create Government node attribute*
```{r}
nodes <- nodes %>%
  mutate(attribute = if_else(type == "National Lab", 1,0))

net <- graph_from_data_frame(d = edges_excl, vertices = nodes, directed = T)

```


*Filter network to nodes with at least one outgoing citation* 
```{r}

outdeg <- degree(net, mode = "out")

# Filter the network to only include these nodes
net <- induced_subgraph(net, vids = V(net)[outdeg > 0])

```



*Create a label for citing a government node*

```{r}
# Identify nodes with the attribute of interest
gov_nodes <- V(net)[attribute == 1]


# Create an attribute for outdegree value 
V(net)$outdegree <- degree(net, mode = "out")

V(net)$indegree <- degree(net, mode = "in")

```


```{r}
# Set cites_gov to 1 for nodes with incoming ties from governmental nodes

V(net)$cites_gov <- sapply(V(net), function(v) {
  any(gov_nodes %in% neighbors(net, v, mode = "in"))
})
# Convert logical TRUE/FALSE to binary 1/0
V(net)$cites_gov <- as.numeric(V(net)$cites_gov)

```

```{r}
net_network <- asNetwork(net)
```


Do nodes that cite government nodes tend to receive more citations themselves?

*Basic Model*
```{r}
model <- ergm(net_network ~ edges + nodeicov("cites_gov"))
summary(model)
```



